# import necassary libraries
import pandas as pd
#load datasets
fuel_data = pd.read_csv('fuel_prices.csv', delimiter=';')
transport_data = pd.read_csv('road_transport.csv', delimiter=';')
transport_data.head()
| Goederenstromen | Perioden | 1000 kg | |
|---|---|---|---|
| 0 | Inkomend transport; totaal | 2007* | 103838966 |
| 1 | Inkomend transport; totaal | 2008* | 103405705 |
| 2 | Inkomend transport; totaal | 2009* | 89433416 |
| 3 | Inkomend transport; totaal | 2010* | 89935769 |
| 4 | Inkomend transport; totaal | 2011* | 91253760 |
fuel_data.head()
| Perioden | Benzine | Diesel | Lpg | |
|---|---|---|---|---|
| 0 | 2006 zondag 1 januari | 1,325 | 1,003 | 0,543 |
| 1 | 2006 maandag 2 januari | 1,328 | 1,007 | 0,542 |
| 2 | 2006 dinsdag 3 januari | 1,332 | 1,007 | 0,54 |
| 3 | 2006 woensdag 4 januari | 1,348 | 1,02 | 0,55 |
| 4 | 2006 donderdag 5 januari | 1,347 | 1,021 | 0,55 |
#only run this cell one time
#remove asterix from tansport data
transport_data['Perioden'] = transport_data['Perioden'].str.replace('*', '', regex=False)
#set perioden to datetime format year
transport_data['Perioden'] = pd.to_datetime(transport_data['Perioden'])
transport_data['Perioden'] = transport_data['Perioden'].dt.year
transport_data.head()
%store transport_data
Stored 'transport_data' (DataFrame)
#create dataframe for incomming transport
#filtering the data so only the value "inkomend transport; totaal" is given
transport_data_incoming = transport_data[(transport_data['Goederenstromen'] == 'Inkomend transport; totaal')].copy()
transport_data_incoming.reset_index(drop=True)
#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_incoming['Transport Inkomend Diff (%)'] = ((transport_data_incoming['1000 kg']
- transport_data_incoming['1000 kg'].iloc[0])
/ transport_data_incoming['1000 kg'].iloc[0]) * 100
transport_data_incoming.head()
%store transport_data_incoming
Stored 'transport_data_incoming' (DataFrame)
#create dataframe for inbound throughgoing transport
transport_data_inthrough = transport_data[(transport_data['Goederenstromen'] == 'Inkomende doorvoer; totaal')].copy()
#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_inthrough['Transport inbound throughgoing Diff (%)'] = ((transport_data_inthrough['1000 kg']
- transport_data_inthrough['1000 kg'].iloc[0])
/ transport_data_inthrough['1000 kg'].iloc[0]) * 100
transport_data_inthrough.head()
%store transport_data_inthrough
Stored 'transport_data_inthrough' (DataFrame)
#create dataframe for outbound throughgoing tranport
transport_data_outthrough = transport_data[(transport_data['Goederenstromen'] == 'Uitgaande doorvoer; totaal')].copy()
#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_outthrough['Transport outbound throughgoing Diff (%)'] = ((transport_data_outthrough['1000 kg']
- transport_data_outthrough['1000 kg'].iloc[0])
/ transport_data_outthrough['1000 kg'].iloc[0]) * 100
transport_data_outthrough.head()
%store transport_data_outthrough
Stored 'transport_data_outthrough' (DataFrame)
#create dataframe for outgoing tranport
transport_data_outgoing = transport_data[(transport_data['Goederenstromen'] == 'Uitgaand transport; totaal')].copy()
#Add percentage differences column with respect to the first year in the dataset (2007)
transport_data_outgoing['Transport outgoing Diff (%)'] = ((transport_data_outgoing['1000 kg']
- transport_data_outgoing['1000 kg'].iloc[0])
/ transport_data_outgoing['1000 kg'].iloc[0]) * 100
transport_data_outgoing.head()
%store transport_data_outgoing
Stored 'transport_data_outgoing' (DataFrame)
#only run this cell one time
from datetime import datetime
# Custom function to convert a date string to a Pandas datetime
def parse_date(date_str):
for i, month_name in enumerate(dutch_months):
date_str = date_str.replace(month_name, datetime(2000, i+1, 1).strftime('%B'))
for i, day_name in enumerate(dutch_days):
date_str = date_str.replace(day_name, datetime(2000, 1, i+1).strftime('%A'))
date_format = '%Y %A %d %B'
return pd.to_datetime(date_str, format=date_format)
# Define Dutch months and day names
dutch_months = ['januari', 'februari', 'maart', 'april', 'mei', 'juni', 'juli', 'augustus', 'september', 'oktober', 'november', 'december']
dutch_days = ['zondag', 'maandag', 'dinsdag', 'woensdag', 'donderdag', 'vrijdag', 'zaterdag']
# Apply the custom function to the "Perioden" column
fuel_data['Perioden'] = fuel_data['Perioden'].apply(parse_date)
#set perioden to datetime format year
fuel_data['Perioden'] = fuel_data['Perioden'].dt.year
#replace , to . and set to float
fuel_data['Benzine'] = fuel_data['Benzine'].str.replace(',', '.', regex=False)
fuel_data['Benzine'] = fuel_data['Benzine'].astype(float)
fuel_data['Diesel'] = fuel_data['Diesel'].str.replace(',', '.', regex=False)
fuel_data['Diesel'] = fuel_data['Diesel'].astype(float)
fuel_data['Lpg'] = fuel_data['Lpg'].str.replace(',', '.', regex=False)
fuel_data['Lpg'] = fuel_data['Lpg'].astype(float)
# Group by year and calculate the mean for all columns
fuel_data = fuel_data.groupby('Perioden').mean().reset_index()
# Remove years that are not in the transport dataset
years_to_remove = [2006, 2022, 2023]
# Remove rows for the specified years
fuel_data = fuel_data[~fuel_data['Perioden'].isin(years_to_remove)]
# Display the DataFrame with the converted dates
fuel_data.head()
%store fuel_data
Stored 'fuel_data' (DataFrame)
#Add percentage differences column between fuel prices with respect to the first year in the dataset (2007)
fuel_data['Benzine Price Diff (%)'] = ((fuel_data['Benzine'] - fuel_data['Benzine'].iloc[0]) /
fuel_data['Benzine'].iloc[0]) * 100
fuel_data['Diesel Price Diff (%)'] = ((fuel_data['Diesel'] - fuel_data['Diesel'].iloc[0]) /
fuel_data['Diesel'].iloc[0]) * 100
fuel_data['Lpg Price Diff (%)'] = ((fuel_data['Lpg'] - fuel_data['Lpg'].iloc[0]) /
fuel_data['Lpg'].iloc[0]) * 100
fuel_data.head()
%store fuel_data
Stored 'fuel_data' (DataFrame)
#import libraries
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# show price difference and transport difference in percentage in one plot
# Create a plotly figure
Figure = px.line()
# Add transport incoming difference % dataset
Figure.add_scatter(x=transport_data_incoming['Perioden'],
y=transport_data_incoming['Transport Inkomend Diff (%)'],
mode='lines+text',
name='Transport Inkomend Diff (%)',
text=transport_data_incoming['Transport Inkomend Diff (%)'].round(1).astype(str) + '%',
textposition="top center")
# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
Figure.add_scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines+text',
name=i,
text=fuel_data[i].round(1).astype(str) + '%',
textposition="top center")
# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure.update_layout(title='Percentage differences over time with respect to the first year',
xaxis_title='Year',
yaxis_title='Percentage Difference',
xaxis_range=[2007, 2022],
yaxis_range=[-50, 50],
height=500,
width=1000)
# Show the figure
Figure.show()
%store Figure
Stored 'Figure' (Figure)
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure2 = make_subplots(specs=[[{'secondary_y': True}]])
# Add the incoming transport
Figure2.add_trace(
go.Scatter(x=transport_data_incoming['Perioden'],
y=transport_data_incoming['1000 kg'],
mode='lines',
name='Incoming transport'), secondary_y=False)
# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
Figure2.add_trace(
go.Scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines',
name=i), secondary_y=True)
# Update the layout with axis labels
Figure2.update_layout(
title='Fuel price vs Incoming transport',
xaxis_title='Perioden',
yaxis_title='Transport in Ton',
yaxis2_title='Fuel Prices')
# Show the plot
Figure2.show()
%store Figure2
Stored 'Figure2' (Figure)
# show price difference and transport difference in percentage in one plot
# Create a plotly figure
Figure3 = px.line()
# Add transport inbound throughgoing difference % dataset
Figure3.add_scatter(x=transport_data_inthrough['Perioden'],
y=transport_data_inthrough['Transport inbound throughgoing Diff (%)'],
mode='lines+text',
name='Transport inbound throughgoing (%)',
text=transport_data_inthrough['Transport inbound throughgoing Diff (%)'].round(1).astype(str) + '%',
textposition="top center")
# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
Figure3.add_scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines+text',
name=i,
text=fuel_data[i].round(1).astype(str) + '%',
textposition="top center")
# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure3.update_layout(title='Percentage differences over time with respect to the first year',
xaxis_title='Year',
yaxis_title='Percentage Difference',
xaxis_range=[2007, 2022],
yaxis_range=[-50, 100],
height=500,
width=1000)
# Show the figure
Figure3.show()
%store Figure3
Stored 'Figure3' (Figure)
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure4 = make_subplots(specs=[[{'secondary_y': True}]])
# Add the inbound throughgoing transport
Figure4.add_trace(
go.Scatter(x=transport_data_inthrough['Perioden'],
y=transport_data_inthrough['1000 kg'],
mode='lines',
name='Inbound throughgoing transport'), secondary_y=False)
# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
Figure4.add_trace(
go.Scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines',
name=i), secondary_y=True)
# Update the layout with axis labels
Figure4.update_layout(
title='Fuel price vs Inbound throughgoing transport',
xaxis_title='Perioden',
yaxis_title='Transport in Ton',
yaxis2_title='Fuel Prices')
# Show the plot
Figure4.show()
%store Figure4
Stored 'Figure4' (Figure)
# show price difference and transport difference in percentage in one plot
# Create a plotly figure
Figure5 = px.line()
# Add transport outbound throughgoing difference % dataset
Figure5.add_scatter(x=transport_data_outthrough['Perioden'],
y=transport_data_outthrough['Transport outbound throughgoing Diff (%)'],
mode='lines+text',
name='Transport outbound throughgoing',
text=transport_data_outthrough['Transport outbound throughgoing Diff (%)'].round(1).astype(str) + '%',
textposition='top center')
# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
Figure5.add_scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines+text',
name=i,
text=fuel_data[i].round(1).astype(str) + '%',
textposition="top center")
# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure5.update_layout(title='Percentage differences over time with respect to the first year',
xaxis_title='Year',
yaxis_title='Percentage Difference',
xaxis_range=[2007, 2022],
yaxis_range=[-50, 50],
height=500,
width=1000)
# Show the figure
Figure5.show()
%store Figure5
Stored 'Figure5' (Figure)
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure6 = make_subplots(specs=[[{'secondary_y': True}]])
# Add the outbound throughgoing transport
Figure6.add_trace(
go.Scatter(x=transport_data_outthrough['Perioden'],
y=transport_data_outthrough['1000 kg'],
mode='lines',
name='Outbound throughgoing transport'), secondary_y=False)
# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
Figure6.add_trace(
go.Scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines',
name=i), secondary_y=True)
# Update the layout with axis labels
Figure6.update_layout(
title='Fuel price vs Outbound throughgoing transport',
xaxis_title='Perioden',
yaxis_title='Transport in Ton',
yaxis2_title='Fuel Prices')
# Show the plot
Figure6.show()
%store Figure6
Stored 'Figure6' (Figure)
# show price difference and transport difference in percentage in one plot
# Create a plotly figure
Figure7 = px.line()
# Add transport outgoing difference % dataset
Figure7.add_scatter(x=transport_data_outgoing['Perioden'],
y=transport_data_outgoing['Transport outgoing Diff (%)'],
mode='lines+text',
name='Transport outgoing',
text=transport_data_outgoing['Transport outgoing Diff (%)'].round(1).astype(str) + '%',
textposition='top center')
# Add the fuel prices, for three different columns benzine, diesel and lpg
for i in ['Benzine Price Diff (%)', 'Diesel Price Diff (%)', 'Lpg Price Diff (%)']:
Figure7.add_scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines+text',
name=i,
text=fuel_data[i].round(1).astype(str) + '%',
textposition="top center")
# Update the layout of the figure, add a title, x-axis title, and y-axis title
Figure7.update_layout(title='Percentage differences over time with respect to the first year',
xaxis_title='Year',
yaxis_title='Percentage Difference',
xaxis_range=[2007, 2022],
yaxis_range=[-50, 50],
height=500,
width=1000)
# Show the figure
Figure7.show()
%store Figure7
Stored 'Figure7' (Figure)
#plot fuel prices and incoming transport in 1 figure
#Make sure the graph can have two y-axis
Figure8 = make_subplots(specs=[[{'secondary_y': True}]])
# Add the outgoing transport
Figure8.add_trace(
go.Scatter(x=transport_data_outgoing['Perioden'],
y=transport_data_outgoing['1000 kg'],
mode='lines',
name='Outgoing transport'), secondary_y=False)
# add the fuel prices
for i in ['Benzine', 'Diesel', 'Lpg']:
Figure8.add_trace(
go.Scatter(x=fuel_data['Perioden'],
y=fuel_data[i],
mode='lines',
name=i), secondary_y=True)
# Update the layout with axis labels
Figure8.update_layout(
title='Fuel price vs Outgoing transport',
xaxis_title='Perioden',
yaxis_title='Transport in Ton',
yaxis2_title='Fuel Prices')
# Show the plot
Figure8.show()
%store Figure8
Stored 'Figure8' (Figure)